02-06/12/2019
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 2.1.3 ✔ purrr 0.3.3 ## ✔ readr 1.3.1 ✔ stringr 1.4.0 ## ✔ tibble 2.1.3 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────── tidyverse_conflicts() ── ## ✖ magrittr::extract() masks tidyr::extract() ## ✖ dplyr::filter() masks plotly::filter(), stats::filter() ## ✖ kableExtra::group_rows() masks dplyr::group_rows() ## ✖ magrittr::inset() masks ggmap::inset() ## ✖ dplyr::lag() masks stats::lag() ## ✖ purrr::set_names() masks magrittr::set_names()
library(ggthemes)
# for the afternoon, we need to install rgdal.
# install.packages('leaflet')
dat <- read.csv('data/fatal-police-shootings-data.csv')
new_dat <- dat %>% group_by(flee) %>% summarise(counts = n()) %>% filter(flee != '')
ggplot(data = new_dat, aes(x = flee, counts)) +
geom_bar(stat = 'identity') + labs(x = '', y = 'Counts', title = 'People killed by police') + theme_clean()
summary(dat)
## id name date ## Min. : 3 TK TK : 104 2018-04-01: 10 ## 1st Qu.:1125 Michael Johnson : 3 2018-01-06: 9 ## Median :2168 Andrew Kana : 2 2018-02-01: 9 ## Mean :2176 Angel Uolla : 2 2018-06-29: 9 ## 3rd Qu.:3230 Brandon Jones : 2 2015-07-07: 8 ## Max. :4279 Christian Chavez: 2 2015-12-14: 8 ## (Other) :3767 (Other) :3829 ## manner_of_death armed age gender race ## shot :3672 gun :2149 Min. : 6.00 : 3 : 344 ## shot and Tasered: 210 knife : 568 1st Qu.:27.00 F: 174 A: 61 ## unarmed : 255 Median :35.00 M:3705 B: 918 ## : 249 Mean :36.85 H: 651 ## undetermined: 166 3rd Qu.:45.00 N: 62 ## toy weapon : 143 Max. :91.00 O: 36 ## (Other) : 352 NA's :138 W:1810 ## city state signs_of_mental_illness threat_level ## Los Angeles: 58 CA : 597 False:2970 attack :2445 ## Phoenix : 57 TX : 335 True : 912 other :1235 ## Houston : 39 FL : 236 undetermined: 202 ## Las Vegas : 36 AZ : 194 ## Columbus : 32 CO : 133 ## Chicago : 31 GA : 125 ## (Other) :3629 (Other):2262 ## flee body_camera ## : 136 False:3456 ## Car : 614 True : 426 ## Foot : 484 ## Not fleeing:2521 ## Other : 127 ## ##
new_dat <- dat %>% group_by(flee, threat_level) %>% summarise(counts = n()) %>% filter(flee != '') ggplot(data = new_dat, aes(x = flee, y = counts, fill = threat_level)) + geom_bar(stat = 'identity', position = 'dodge') + labs(x = '', y = 'Counts', title = 'People killed by police') + theme_clean()
dat <- gapminder
dat <- dat %>% filter(year == '2007')
ggplot(data = dat, aes(gdpPercap, lifeExp)) +
geom_point(size = 2, alpha = 0.4) +
labs(x = 'GDP per capita',
y = 'Life expectancy',
title = 'GDP per capita and Life expectancy') + theme_clean()
options(scipen = '999')
dat <- gapminder
dat <- dat %>% filter(year == '2007')
ggplot(data = dat, aes(gdpPercap, lifeExp)) +
geom_point(aes(size = pop), alpha = 0.4) +
labs(x = 'GDP per capita',
y = 'Life expectancy',
title = 'GDP per capita and Life expectancy') + theme_clean()
options(scipen = '999')
dat <- gapminder
dat <- dat %>% filter(year == '2007')
ggplot(data = dat, aes(gdpPercap, lifeExp)) +
geom_point(aes(size = pop, color = continent), alpha = 0.4) +
labs(x = 'GDP per capita',
y = 'Life expectancy',
title = 'GDP per capita and Life expectancy') + theme_clean()
options(scipen = '999')
dat <- gapminder
dat <- dat %>% group_by(year, continent) %>% summarise(avg_gdp = mean(gdpPercap),
avg_life_exp = mean(lifeExp),
avg_pop = mean(pop))
# dat <- dat %>% filter(continent == 'Oceania')
ggplot(data = dat, aes(year, avg_pop)) +
geom_point(aes(color = continent)) +
geom_line(aes(color = continent)) +
labs(x = 'Year',
y = 'Population',
title = 'Year and population by continent') + theme_clean()
options(scipen = '999')
dat <- gapminder
dat <- dat %>% group_by(year, continent) %>% summarise(avg_gdp = mean(gdpPercap),
avg_life_exp = mean(lifeExp),
avg_pop = mean(pop))
# dat <- dat %>% filter(continent == 'Oceania')
ggplot(data = dat, aes(year, avg_pop)) +
geom_point(aes(color = continent, size = avg_gdp)) +
geom_line(aes(color = continent)) +
labs(x = 'Year',
y = 'Population',
title = 'Year and population by continent') + theme_clean()
options(scipen = '999')
dat <- gapminder
dat <- dat %>% group_by(year, continent) %>% summarise(avg_gdp = mean(gdpPercap),
avg_life_exp = mean(lifeExp),
avg_pop = mean(pop))
# dat <- dat %>% filter(continent == 'Oceania')
ggplot(data = dat, aes(year, avg_pop)) +
geom_point(aes(color = continent, size = avg_life_exp)) +
geom_line(aes(color = continent)) +
labs(x = 'Year',
y = 'Population',
title = 'Year and population by continent') + theme_clean()
options(scipen = '999')
dat <- gapminder
dat <- dat %>% group_by(year, country) %>% filter(continent == 'Asia' & country %in% c('China', 'Japan', 'Singapore', 'Korea, Rep.')) %>% summarise(gdp = mean(gdpPercap),
life_exp = mean(lifeExp),
pop = mean(pop))
# dat <- dat %>% filter(continent == 'Oceania')
ggplot(data = dat, aes(year, gdp)) +
geom_point(aes(color = country, size = pop)) +
geom_line(aes(color = country)) +
labs(x = 'Year',
y = 'GDP per capita',
title = 'Year and GDP') + theme_clean()
# Plot experience vs. salary (color represents rank)
ggplot(salaries, aes(x = yrs_since_phd,
y = salary,
color=rank)) +
geom_point() +
labs(title = "Academic salary by rank and years since degree",
x = "Years since PhD",
y = "Salary ($)",
color = "Rank") +
theme_pubr()
# Plot experience vs. salary (color represents rank, shape represents sex)
ggplot(salaries,
aes(x = yrs_since_phd,
y = salary,
color = rank,
shape = sex)) +
geom_point(size = 3,
alpha = .6) +
labs(title = "Academic salary by rank, sex, and years since degree",
x = "Years since PhD",
y = "Salary ($)",
color = "Rank",
shape = "Gender") +
theme_pubr()
# Plot experience vs. salary (color represents rank and size represents service)
ggplot(salaries,
aes(x = yrs_since_phd,
y = salary,
color = rank,
size = yrs_service)) +
geom_point(size = 3,
alpha = .6) +
labs(title = "Academic salary by rank, years of service, and years since degree",
x = "Years since PhD",
y = "Salary ($)",
color = "Rank",
size = "Years of service") +
theme_pubr()
# Plot salary histograms by rank
ggplot(salaries, aes(x = salary)) +
geom_histogram(fill = "lightseagreen",
color = "white") +
facet_wrap(~rank, ncol = 1) +
labs(title = "Salary histograms by rank") +
theme_pubr()
# Plot salary histograms by rank and sex
ggplot(salaries, aes(x = salary/1000)) +
geom_histogram(color = "white",
fill = "lightseagreen") +
facet_grid(sex ~ rank) +
labs(title = "Salary histograms by sex and rank",
x = "Salary ($1000)") +
theme_pubr()
# Calculate means and standard erroes by sex, rank and discipline
library(dplyr)
sal_srd <- salaries %>%
group_by(sex, rank, discipline) %>%
summarize(n = n(),
mean = mean(salary),
sd = sd(salary),
se = sd / sqrt(n))
# Check the categories of discipline levels(salaries$discipline) # A = Theoretical, B = Applied
## [1] "A" "B"
# Create better labels for discipline's categories
sal_srd$discipline <- factor(sal_srd$discipline, levels = c("A", "B"),
labels = c("Theoretical","Applied"))
sal_srd
## # A tibble: 12 x 7 ## # Groups: sex, rank [6] ## sex rank discipline n mean sd se ## <fct> <fct> <fct> <int> <dbl> <dbl> <dbl> ## 1 Female AsstProf Theoretical 6 72933. 5463. 2230. ## 2 Female AsstProf Applied 5 84190. 9792. 4379. ## 3 Female AssocProf Theoretical 4 72128. 6403. 3201. ## 4 Female AssocProf Applied 6 99436. 14086. 5751. ## 5 Female Prof Theoretical 8 109632. 15095. 5337. ## 6 Female Prof Applied 10 131836. 17504. 5535. ## 7 Male AsstProf Theoretical 18 74270. 4580. 1080. ## 8 Male AsstProf Applied 38 84647. 6900. 1119. ## 9 Male AssocProf Theoretical 22 85049. 10612. 2262. ## 10 Male AssocProf Applied 32 101622. 9608. 1698. ## 11 Male Prof Theoretical 123 120619. 28505. 2570. ## 12 Male Prof Applied 125 133518. 26514. 2372.
# Create plot
ggplot(sal_srd, aes(x = sex, y = mean, color = sex)) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .1) +
scale_y_continuous(breaks = seq(70000, 140000, 10000), label = scales::dollar) +
facet_grid(. ~ rank + discipline) +
theme_bw() +
theme(legend.position = "none",
panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank()) +
labs(x="", y="",
title="Nine month academic salaries by gender, discipline, and rank",
subtitle = "(Means and standard errors)") +
scale_color_brewer(palette="Set1")
www.databrew.cc/exercises3
map('world')
abline(h = 0, col = 'red')
abline(v = 0, col = 'blue')
Choropleth
Choropleth
Point
Point density
Point density
Point density
Administrative
Population cartogram
Elevation maps
library(leaflet)
mosquito <- rio::import("data/mosquito_habitat.rds") # Note: This are fake data
# Print the variable names
names(mosquito)
## [1] "habitat_id" "latitude" "longitude" "anophel_larvae"
mosquito[4:6,]
## habitat_id latitude longitude anophel_larvae ## 451 MHM-0004 -25.52311 32.80695 2 ## 331 MHM-0005 -25.52289 32.84260 0 ## 459 MHM-0006 -25.52044 32.81731 0
library(leaflet) leaflet(mosquito, width = 1000, height = 600) %>% addTiles() %>% # Add default OpenStreetMap map tiles addCircleMarkers(lng = ~longitude, lat = ~latitude)
leaflet(mosquito, width = 1000, height = 600) %>%
addTiles() %>%
addCircleMarkers(lng = ~longitude,lat = ~latitude,
color = ~ifelse(anophel_larvae>0,'red','green'),
popup = ~paste0('Habitat: ', habitat_id,
', Number of anophel larvae: ', anophel_larvae),
label = ~habitat_id,
radius = 7,stroke = F,fillOpacity = 0.5)
data(quakes) dat <- quakes # Show first 20 rows from the `quakes` dataset leaflet(data = dat) %>% addTiles() %>% addMarkers(~long, ~lat, popup = ~as.character(mag), label = ~as.character(mag))